Project Aim/Goal MoT are looking to adapt traficalmr utilities and methodologies for New Zealand using open data to understand road safety outcomes
Project Description Adapting UK’s trafficalmr to the New Zealand context and enable policy.traffic calming intervention analyses.
This project involves data processing with various data such as geospatial datasets, APIs and census datasets.
The policy analysis will use the developed data foundations to estimate the casualty rate per billion kilometres for walking and cycling during journey to work commutes.
Project Outcomes - R functions for processing CAS, census journet to work and, transport relevant openstreetmap data using trafficalmr as a guide. - Estimating causalty rates per billion kilometers for cycling and walking
https://opendata-nzta.opendata.arcgis.com/datasets/8d684f1841fa4dbea6afaefc8a1ba0fc_0
https://www.openstreetmap.org/
The 2018 Census commuter view dataset contains the employed census usually resident population count aged 15 years and over by statistical area 2 for the main means of travel to work variable from the 2018 census. The geography corresponds to 2018 boundaries
This 2018 Census commuter view dataset is displayed by statistical area 2 geography and contains from-to (journey) information on an individual’s usual residence and workplace address by main means to travel of work
Workplace address definition - coded from information supplied by respondents about their workplaces. Where respondents do not supply sufficient information, their responses are coded to ‘no further defined’. The 2018 Census commuter view datasets excludes these ‘not further defined’ areas, as such the sum of the counts for each region in this dataset may not be equal to the total employed census usually resident population count aged 15 years and over for that region.
As noted by Shriv the columns X and Y in CAS data is a GeometryTrype Type. A spatial dataset and the crs is 2193 (usually used 4326). 2193 is a standard specifically for NZ https://epsg.io/2193.
The CAS data consists of the count of object and vehicle type associate to each crash (per row).
Is there a need to check the interaction other attributes in the data such as weather using regression?
Crash data consists of crashes from 2000 to 2021.
## Loading the cast data
#install.packages("iotools")
if (!file.exists("Data/Crash_Analysis_System_(CAS)_data.rds")) {
cas.data <- iotools::read.csv.raw("Data/Crash_Analysis_System_(CAS)_data.csv")
saveRDS(cas.data, file="Data/Crash_Analysis_System_(CAS)_data.rds")
} else {cas.data <- readRDS("Data/Crash_Analysis_System_(CAS)_data.rds")}
## Warning in readRDS("Data/Crash_Analysis_System_(CAS)_data.rds"): strings not
## representable in native encoding will be translated to UTF-8
## link to cas data
#https://opendata-nzta.opendata.arcgis.com/datasets/8d684f1841fa4dbea6afaefc8a1ba0fc_0/explore?location=-9.962041%2C0.000000%2C2.20
#size - 760k rows and 72 columns
dim(cas.data)
## [1] 758757 72
#preview the data
#head(cas.data)
#check the data types in the data
#str(cas.data)
#vehicle columns
cas.vehicle <- c('bicycle',
'bus',
'carStationWagon',
'moped',
'motorcycle',
'otherVehicleType',
'schoolBus',
'suv',
'taxi',
'train',
'truck',
'unknownVehicleType',
'vanOrUtility',
'vehicle')
#Histogram of number of vehicles involve in the crash
#Proportion 1 vechicle crashes involved objects such as tree, cliff and etc.
hist(rowSums(cas.data[cas.vehicle], na.rm = TRUE), main = "Histogram of number of vehicles involve in the crash", ylab = 'no. of crashes', xlab = 'no. of vehicle in the crash')
#Vehicle type crashes
op <- par(mar = c(10,4,2,1) + 0.1)
barplot(colSums(cas.data[cas.vehicle], na.rm = TRUE), las=2, main = '2000 to 2021 Crashes by Vehicle Type')
par(op)
colSums(cas.data[cas.vehicle], na.rm = TRUE)
## bicycle bus carStationWagon moped
## 22105 12135 1005046 5466
## motorcycle otherVehicleType schoolBus suv
## 27058 3519 519 77470
## taxi train truck unknownVehicleType
## 8261 475 61278 1687
## vanOrUtility vehicle
## 130700 8039
serious.injury.per.year <- aggregate(cas.data$seriousInjuryCount, by = list(Crash.Year = cas.data$crashYear), FUN = sum, na.rm = T)
minor.injury.per.year <- aggregate(cas.data$minorInjuryCount, by = list(Crash.Year = cas.data$crashYear), FUN = sum, na.rm = T)
fatality.per.year <- aggregate(cas.data$fatalCount, by = list(Crash.Year = cas.data$crashYear), FUN = sum, na.rm = T)
#plotting the number of crashes
plot(table(cas.data$crashYear), col = 4, ty = 'o', ylab = "number of crashes", main = "The count of crashes per year")
#serious injury crashes
lines(fatality.per.year$Crash.Year, fatality.per.year$x, col = "red", ty = 'o')
#serious injury crashes
lines(serious.injury.per.year$Crash.Year, serious.injury.per.year$x, col = "orange", ty = 'o')
#minor injury crashes
lines(minor.injury.per.year$Crash.Year, minor.injury.per.year$x, col = "purple", ty = 'o')
legend(2008, -10000, legend = c("Total Crashes", "Fatalities", "Serious Injury", "Minor injury"),col=c("blue", "red", "orange", "purple"), bty='n', xpd=NA, horiz=TRUE, xjust=0.5, lwd=c(2,2,2,2))
#average serious injury from crash per year
mean(serious.injury.per.year$x)
## [1] 2370.136
#year with the highest crash
max_val <- max(table(cas.data$crashYear))
#Year with the highest crashes
table(cas.data$crashYear)[table(cas.data$crashYear) == max_val]
## 2007
## 41661
###STATS NZ Journey to work data
#load journey to work data
journey.to.work <- read.csv("Data/2018-census-main-means-of-travel-to-work-by-statistical-a.csv", fileEncoding="UTF-8-BOM")
#size - 50870 19
dim(journey.to.work)
## [1] 50870 19
#columns
# WARNING: noticed column names is different for different PCs - mac vs windows
#names(journey.to.work)
#columns and data type
#str(journey.to.work)
#proportion of people that works the same area
journey.to.work$same.area.boolean <- journey.to.work[[1]] == journey.to.work[[5]]
same.area.df <- aggregate(journey.to.work$Total, by=list(same.area=journey.to.work$same.area.boolean), FUN=sum)
barplot(height=same.area.df$x, names=c('different area', 'same area'), col="#69b3a2", main = 'Commuter Workplace vs. Residence Location')
#74% of the commuter work on different area from its residential area
same.area.df$x[1]/sum(same.area.df$x)
## [1] 0.7391436
same.area.df$x[2]/sum(same.area.df$x)
## [1] 0.2608564
#replacing -999 with 0
journey.to.work[journey.to.work == "-999"] <- 0
#most commuters drive a private car/truck/van
op <- par(mar = c(18,4,2,1) + 0.1)
barplot(colSums(journey.to.work[, 9:18]), las = 2)
par(op)
colSums(journey.to.work[, 9:18])
## Work_at_home
## 290973
## Drive_a_private_car_truck_or_van
## 821394
## Drive_a_company_car_truck_or_van
## 101523
## Passenger_in_a_car_truck_van_or_company_bus
## 20628
## Public_bus
## 44127
## Train
## 22896
## Bicycle
## 15207
## Walk_or_jog
## 72954
## Ferry
## 2661
## Other
## 6828
##loading the data
sa2.centroid.inside <- read.csv("Data/statistical-area-2-2018-centroid-inside.csv", fileEncoding="UTF-8-BOM")
#names(sa2.centroid.inside)
##joining long and lat data from 'sa2 centroid inside' data with journey to work data
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
#sa2.centroid.inside[c("SA22018_V1_00", "LATITUDE", "LONGITUDE")]
## joining coordinates (geocode) of usual residence
df <- journey.to.work %>%
left_join(sa2.centroid.inside[c("SA22018_V1_00", "LATITUDE", "LONGITUDE")], by = c("SA2_code_usual_residence_address" = "SA22018_V1_00")) %>%
rename(usual.residence.LATITUDE = LATITUDE, usual.residence.LONGITUDE = LONGITUDE)
## joining coordinates (geocode) of usual workplace
df2 <- df %>%
left_join(sa2.centroid.inside[c("SA22018_V1_00", "LATITUDE", "LONGITUDE")], by = c("SA2_code_workplace_address" = "SA22018_V1_00")) %>%
rename(usual.work.LATITUDE = LATITUDE, usual.work.LONGITUDE = LONGITUDE)
#names(df2)
#dim(df2)
#install.packages("geosphere")
library(geosphere)
#getting the distance between two points using Haversine formula in meters
df2$dist <- distHaversine(df2[c("usual.residence.LONGITUDE", "usual.residence.LATITUDE")], df2[c("usual.work.LONGITUDE", "usual.work.LATITUDE")])
hist(df2[df2$dist > 0, ]$dist/1e3, breaks=1000, xlim=c(0,50), main = "Histogram of distance travelled by commuter from home to work (km)", xlab = 'distance travelled (km)')
#total serious injury and
serious_injury_and_fatal_crashes <- sum(cas.data[cas.data$crashYear %in% 2016:2019, ][c("seriousInjuryCount", "fatalCount")], na.rm = T)
total_dist_travelled <- sum(df2$dist)
#KSI/bkm in crashes between 2016 to 2019
serious_injury_and_fatal_crashes/(total_dist_travelled/1e9)
## [1] 22250.79
#KSI/bkm in crashes between 2018
sum(cas.data[cas.data$crashYear == 2018, ][c("seriousInjuryCount", "fatalCount")], na.rm = T)/(total_dist_travelled/1e9)
## [1] 5543.153
Note there are different libraries to access openstreet map, either using ggmap, mapview, OpenStreetMap.
# this function was from trafficalmr
#remotes::install_github("saferactive/trafficalmr")
tc_get_osm = function(bbox = NULL, value = NULL, output = "osm_points") {
res = osmdata::osmdata_sf(
osmdata::add_osm_feature(
opq = osmdata::opq(bbox = bbox),
key = "traffic_calming",
value = value,
value_exact = TRUE
)
)
res[[output]]
}
## function used to get traffic calming interventions from OpenStreetMap
traffic_calming_points = tc_get_osm(bbox = "Auckland New Zealand") #tc_get_osm is from osm_data
mapview::mapview(traffic_calming_points["traffic_calming"])
## convert from NZTM projection (EPSG:2193) to lat/lon
## https://www.linz.govt.nz/data/linz-data-service/guides-and-documentation/using-lds-xyz-services-in-leaflet
#install.packages("proj4")
loc = proj4::project(cas.data[1:2], "+proj=tmerc +lat_0=0 +lon_0=173 +k=0.9996 +x_0=1600000 +y_0=10000000 +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +units=m +no_defs", inv=TRUE)
#install.packages("snippets",,"http://rforge.net/",type="source")
library(snippets)
#install.packages("png")
library(png)
par(mar=rep(0,4))
plot(loc$x, loc$y, pch='.', asp=1/cos(-40.6/180*pi), ty='n')
#overlay with oopen street map
osmap()
points(loc$x, loc$y, pch='.',col = 'red')
#auckland geo spatial boundaries
q = list(x = c(174.149667174548, 175.383632470626), y = c(-36.554874067828,
-36.991016113938))
akl = loc$x > range(q$x)[1] & loc$x < range(q$x)[2] & loc$y > range(q$y)[1] & loc$y < range(q$y)[2] & cas.data$crashYear %in% 2016:2019
akl[is.na(akl)] = FALSE
sum(akl)
## [1] 30624
mean(akl)
## [1] 0.04036075
#usual residence location points
g = aggregate(df2$Total, list(lon=df2$usual.residence.LONGITUDE,lat=df2$usual.residence.LATITUDE), sum)
plot(0, 0, pch='.', xlim=range(q$x), ylim=range(q$y), asp=1/cos(-40.6/180*pi), ty='n')
#usual work location points
g2 = aggregate(df2$Total, list(lon=df2$usual.work.LONGITUDE,lat=df2$usual.work.LATITUDE), sum)
#plotting it
par(mar=rep(0,4))
plot(0, 0, pch='.', xlim=range(q$x), ylim=range(q$y), asp=1/cos(-40.6/180*pi), ty='n')
snippets::osmap()
#RED - usual work,
points(g$lon, g$lat,pch=19,cex=sqrt(g$x/max(g$x))*1.2,col="#ff000040")
points(g2$lon, g2$lat,pch=19,cex=sqrt(g2$x/max(g2$x))*1.5,col="#0000ff80")
legend("topright", legend=c("Residence", "Work"),
col=c("red", "blue"), pch = 16, cex=1, box.col="white")
ga = g[g$lon > range(q$x)[1] & g$lon < range(q$x)[2] & g$lat > range(q$y)[1] & g$lat < range(q$y)[2], ]
## warning ! bad coordinates - since lat/lon aspect ration is not 1!! jsut a hack ;)
#used the deldir package
#installed.packages("deldir")
d = deldir::deldir(ga$lon, ga$lat)
plot(0, 0, pch='.', xlim=range(q$x), ylim=range(q$y), asp=1/cos(-40.6/180*pi), ty='n')
snippets::osmap()
points(g$lon, g$lat,pch=19,cex=sqrt(g$x/max(g$x))*1.2,col="#ff000040")
points(g2$lon, g2$lat,pch=19,cex=sqrt(g2$x/max(g2$x))*1.5,col="#0000ff80")
plot(d, add=TRUE, wlines="tess", pch='.')